Chapter 4 MAGs overview
4.1 MAGs phylogeny
# Which phylum the MAG belongs to
phyla <- ehi_phylum_colors %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique()
# What is the genome size of the MAG in MBs (megabases)
lengths <- genome_metadata %>%
select(c(genome,length)) %>%
mutate(length=round(length/1000000,2))
# What is the completeness of the MAG
mag_completeness <- genome_metadata %>%
select(c(genome,completeness)) %>%
as.data.frame() %>%
remove_rownames() %>%
column_to_rownames(var = "genome")
# Generate the phylum color heatmap
phylum_heatmap <- ehi_phylum_colors %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(genome,phylum) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome")
# Create baseline circular genome tree
circular_tree <- force.ultrametric(genome_tree,method="extend") %>%
ggtree(., layout = 'circular', size = 0.1, angle=45) +
xlim(-1, NA)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
# Add phylum colors ring
circular_tree <- gheatmap(circular_tree, phylum_heatmap, offset=0.05, width=0.3, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic, name="Phylum") +
#geom_tiplab2(size=1, hjust=-0.1) +
theme(legend.position = "right", plot.margin = margin(0, 0, 0, 0), panel.margin = margin(0, 0, 0, 0))
# Flush color scale to enable a new color scheme in the next ring
circular_tree <- circular_tree + new_scale_fill()
# Add completeness ring
circular_tree <- circular_tree +
new_scale_fill() +
scale_fill_gradient(low = "#d1f4ba", high = "#f4baba", name="Genome\ncontamination") +
geom_fruit(
data=genome_metadata,
geom=geom_bar,
mapping = aes(x=completeness, y=genome, fill=contamination),
offset = 0.55,
orientation="y",
stat="identity")
# Add genome-size ring
circular_tree <- circular_tree +
new_scale_fill() +
scale_fill_manual(values = "#cccccc") +
geom_fruit(
data=lengths,
geom=geom_bar,
mapping = aes(x=length, y=genome),
offset = 0.05,
orientation="y",
stat="identity")
#Plot circular tree
circular_tree4.2 Genome quality
[1] 84.89347
[1] 15.4699
[1] 2.013071
[1] 2.10322
#create input table from original genome table
genome_details <- genome_metadata %>%
select(c(genome,domain,phylum,completeness,contamination,length)) %>%
mutate(length=round(length/1000000,2)) %>% #change length to MBs
rename(comp=completeness,cont=contamination,size=length) %>% #rename columns
remove_rownames() %>%
arrange(match(genome, rev(genome_tree$tip.label))) #sort MAGs according to phylogenetic tree
#generate genome quality biplot
genome_stats_biplot <- genome_details %>%
ggplot(aes(x=comp,y=cont,size=size,color=phylum)) +
geom_point(alpha=0.7) +
ylim(c(10,0)) +
scale_color_manual(values=colors_alphabetic) +
labs(y= "Contamination", x = "Completeness") +
theme_classic() +
theme(legend.position = "none")
#generate contamination boxplot
genome_stats_cont <- genome_details %>%
ggplot(aes(y=cont)) +
ylim(c(10,0)) +
geom_boxplot(colour = "#999999", fill="#cccccc") +
theme_void() +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin = unit(c(0, 0, 0.40, 0),"inches")) #add bottom-margin (top, right, bottom, left)
#generate completeness boxplot
genome_stats_comp <-genome_details %>%
ggplot(aes(x=comp)) +
xlim(c(50,100)) +
geom_boxplot(colour = "#999999", fill="#cccccc") +
theme_void() +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin = unit(c(0, 0, 0, 0.50),"inches")) #add left-margin (top, right, bottom, left)
#create composite figure
grid.arrange(grobs = list(genome_stats_comp,genome_stats_biplot,genome_stats_cont),
layout_matrix = rbind(c(1,1,1,1,1,1,1,1,1,1,1,4),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3)))4.3 Functional attributes of MAGs
#Generate a basal utrametric tree for the sake of visualisation
gift_tree <- force.ultrametric(genome_tree,method="extend") %>%
ggtree(., size = 0.3)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
gift_tree <- gheatmap(gift_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE, color=NA) +
scale_fill_manual(values=colors_alphabetic) +
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
gift_tree <- gift_tree + new_scale_fill()
gift_table <- genome_gifts %>%
column_to_rownames(var="genome")
#Add functions heatmap
gift_tree <- gheatmap(gift_tree, gift_table, offset=0.5, width=3.5, colnames=FALSE, color=NA) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white")+
labs(fill="GIFT")
#Reset fill scale to use a different colour profile in the heatmap
gift_tree <- gift_tree + new_scale_fill()
# Add completeness barplots
gift_tree <- gift_tree +
geom_fruit(data=genome_metadata,
geom=geom_bar,
#grid.params=list(axis="x", text.size=2, nbreak = 1),
axis.params=list(vline=TRUE),
mapping = aes(x=length, y=genome, fill=completeness),
offset = 3.8,
orientation="y",
stat="identity") +
scale_fill_gradient(low = "#cf8888", high = "#a2cc87") +
labs(fill="Genome\ncompleteness")
#Plot combined tree + heatmap
gift_tree +
theme(legend.position='right')4.4 Functional ordination of MAGs (distillr)
distillr_table <- genome_gifts %>%
column_to_rownames(var="genome")
# Generate the tSNE ordination
tSNE_func2 <- Rtsne(X=distillr_table, dims = 2, check_duplicates = FALSE)
# Plot the ordination
tSNE_func2$Y %>%
as.data.frame() %>%
mutate(genome=rownames(distillr_table)) %>%
inner_join(genome_metadata, by="genome") %>%
rename(tSNE1="V1", tSNE2="V2") %>%
select(genome,phylum,tSNE1,tSNE2, completeness) %>%
ggplot(aes(x = tSNE1, y = tSNE2, color = phylum, size=completeness))+
geom_point(shape=16, alpha=0.7) +
scale_color_manual(values=colors_alphabetic) +
theme_minimal() +
theme(legend.position = "right")